#Set Working Directory -- The working directory should include the datasets hrrp_hc_did_ami_smpl.dta, hrrp_hc_did_hf_smpl.dta, hrrp_hc_did_pn_smpl.dta, qualmeasures.rds, and a folder named "Latex File" to capture output figures and tables

current_path = rstudioapi::getActiveDocumentContext()$path 
setwd(dirname(current_path))
print(getwd())

#Load Packages
library(haven)
library(plm)
library(tidyverse)
library(reshape2)
library(xtable)
library(reldist)
library(PanelMatch)
library(stringr)
library(reshape2)

#Load Packages-Placebo
if(!require(ggplot2)){install.packages("ggplot2")}
library(ggplot2)
if(!require(dplyr)){install.packages("dplyr")}
library(dplyr)
if(!require(stringr)){install.packages("stringr")}
library(stringr)
if(!require(reshape2)){install.packages("reshape2")}
library(reshape2)
if(!require(haven)){install.packages("haven")}
library(haven)
if(!require(stargazer)){install.packages("stargazer")}
library(stargazer)
if(!require(mvtnorm)){install.packages("mvtnorm")}
library(mvtnorm)
if(!require(boot)){install.packages("boot")}
library(boot)
if(!require(foreach)){install.packages("foreach")}
library(foreach)
if(!require(doParallel)){install.packages("doParallel")}
library(doParallel) 
if(!require(simcausal)){install.packages("simcausal")}
library(simcausal) 
if(!require(tidyverse)){install.packages("tidyverse")}
library(tidyverse)
if(!require(sandwich)){install.packages("sandwich")}
library(sandwich)
if(!require(multiwayvcov)){install.packages("multiwayvcov")}
library(multiwayvcov)
library(foreign)
if(!require(Matching)){install.packages("Matching")}
library(Matching)
if(!require(ebal)){install.packages("ebal")}
library(ebal)
if(!require(MatchIt)){install.packages("MatchIt")}
library(MatchIt)
if(!require(MatchingFrontier)){install.packages("MatchingFrontier")}
library(MatchingFrontier)
library(xtable)
if(!require(xtable)){install.packages("xtable")}
if(!require(reshape2)){install.packages("reshape2")}
library(reshape2)
if(!require(gdata)){install.packages("gdata")}
library(gdata)
if(!require(Amelia)){install.packages("Amelia")}
library(Amelia)
library(haven)
library(plm)
library(Deriv)
library(tidyverse)
library(ggplot2)
library(reshape2)
library(xtable)
library(reldist)
library(rstudioapi)
library(data.table)

#Set names of datasets by condition
dataset.list <- c("ami" = "hrrp_hc_did_ami_smpl.dta", "hf" = "hrrp_hc_did_hf_smpl.dta", "pn" = "hrrp_hc_did_pn_smpl.dta")

#####################
# Replicate Table 1 #
#####################

#Calculate Summary Statistics for Each Condition
Table_1 <- sapply(dataset.list, function(i){
  
  #Set condition dataset
  df<-read_dta(i)
  
  #Calculate summary statistics by hospital
  hospital_char<-df %>%
    group_by(prvdr_num) %>%
    summarise(n=n(), rate30d_mean=mean(rate), beds_mean=mean(beds),dshpct_mean=mean(dshpct),
              teach_percent=mean(teach)*100, urban_percent=sum(geo==1|geo==2)/sum(!(is.na(geo))),
              meaningful_use=max(mu2r), aco=max(aco2r), bundles=max(bpci2r))
  
  #Calculate summary statistics across all hospitals
  hospital_char_sum<-hospital_char %>%
    summarise(hospitals=0, observations=0, rate_30_day=mean(rate30d_mean)*100, rate_30_day_sd=0, beds=mean(beds_mean),
              beds_sd=sd(beds_mean), dsh=mean(dshpct_mean), dsh_sd=sd(dshpct_mean),
              teach=mean(teach_percent), urban=mean(urban_percent)*100, vol_initit=0,
              mu=mean(meaningful_use)*100, aco=mean(aco)*100, bundles=mean(bundles)*100
    )
  
  #Calculate standard deviation of readmit rate across hospitals
  hospital_char_sum$rate_30_day_sd<-sd(df$rate)*100
  
  #Count number of hospitals
  hospital_char_sum$hospitals<-sum(!(is.na(hospital_char$prvdr_num)))
  
  #Count number of observations (8 per hospital)
  hospital_char_sum$observations<-hospital_char_sum$hospitals*8
  
  #Return Results
  return(t(round(hospital_char_sum,4)))
})

#Format table for export
Table_1=as.data.frame(Table_1)
Table_1[11,]=""
Characteristic_names<-c("Hospitals, No", "Hospital-year observations, No", "30-day risk standardized observations rate, mean", "30-day risk standardized observations rate, sd", "Beds, mean", "Beds, sd", "Disproportionate Share Index, mean", "Disproportionate Share Index, sd", "Teaching hospital, %", "Hospital located in urban area, %", "Hospital participating during any time in study period, %", "Meaningful use of health information technology", "Accountable care organization programs", "Bundled Payment for Care Initiative")
Table_1<-add_column(Table_1, Characteristic_names, .before = "ami")
Table_1 <-Table_1 %>% 
  rename(AMI = ami, 'Heart Failure'=hf, Pneumonia=pn)

#Export table to Latex
print(xtable(Table_1, label = "table1"), file = "Latex File/table_1.tex", floating = FALSE)

######################
# Replicate Figure 1 #
######################

hosp_particp_all_years <- read_dta(dataset.list["pn"]) %>% #Figure only includes information for pneumonia readmission
  group_by(prvdr_num) %>% #Group by hospital
  filter(n()==8) %>% #Only include hospitals with all 8 observations
  filter(msr_end_yr>2009) %>% #Only include observations past 2009
  group_by(prvdr_num, msr_end_yr) %>% #Group by hospital and year
  summarise(no_progs=sum(all(aco2r==0,mu2r==0,bpci2r==0)),
            mu_only=sum(all(aco2r==0,mu2r==1,bpci2r==0)),
            mu_and_aco=sum(all(aco2r==1,mu2r==1,bpci2r==0)),
            aco_only=sum(all(aco2r==1,mu2r==0,bpci2r==0)),
            other=sum(all(aco2r==1,mu2r==1,bpci2r==1))
            +sum(all(aco2r==0,mu2r==0,bpci2r==1))
            +sum(all(aco2r==0,mu2r==1,bpci2r==1)) )%>% #Count participation across combinations of voluntary programs for each hospital/year combination
  group_by(msr_end_yr) %>% #Change group to year only
  summarise(no_progrs=sum(no_progs),
            mu_only=sum(mu_only),
            mu_and_aco=sum(mu_and_aco),
            aco_only=sum(aco_only),
            other=sum(other)
  ) #Count participation across combinations of voluntary programs for each year

#Reformat Dataframe
hosp_particp_all_years<-t(hosp_particp_all_years)
colnames(hosp_particp_all_years) = hosp_particp_all_years[1, ] # the first row will be the header
hosp_particp_all_years = as.data.frame(hosp_particp_all_years[-1, ]) # removing the first row

#Calculate percentage participation from counts and melt rows into summary dataframe
total_hospitals<-sum(hosp_particp_all_years$'2010')
hosp_particp_all_years_w_percents<-(hosp_particp_all_years/total_hospitals)*100
hosp_particp_all_years_w_percents <- rownames_to_column(hosp_particp_all_years_w_percents, var='Voluntary_Programs')
hosp_particp_melted<-as.data.frame(melt(data = hosp_particp_all_years_w_percents, id.vars = c("Voluntary_Programs")))

#Rename variables
hosp_particp_melted <-hosp_particp_melted %>% 
  rename(Year = variable, Hospital_Percentage=value)

#Reformat data for ggplot
hosp_particp_melted$Voluntary_Programs<-factor(hosp_particp_melted$Voluntary_Programs,levels=c("no_progrs", "mu_only","mu_and_aco","aco_only","other"),labels=c(" No Programs  "," MU only  "," MU and ACO  "," ACO only  "," Other  ")
)

#Create figure
Fig_1 <- ggplot(hosp_particp_melted, aes(y=Hospital_Percentage, x=Voluntary_Programs, color=Voluntary_Programs, fill=Voluntary_Programs)) + geom_bar(stat="identity") + facet_grid(~Year, switch='x') + xlab('')+ylab('Hospital, %') + ggtitle('Figure 1. Participation in Voluntary Reforms, 2010-2015, by 2837 Hospitals')+theme(axis.text.x=element_blank()) +  theme(legend.position="top") + theme(strip.placement = "outside") + 
  theme(strip.background =element_rect(fill="white"))+theme(legend.title=element_blank()) +
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major  = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank())

ggsave(file="figure_1.pdf",Fig_1,width=11,height=8.5,units = "in",path="Latex File")

######################
# Replicate Figure 2 #
######################

#Set list of years to be included in figure
msr_end_yr<-c(2008,2009,2010,2011,2012,2013,2014,2015)

#Calculate readmission rate for each year/condition combination
Table_Fig_2 <- lapply(dataset.list, function(i,msr_end_yr){
  df<-read_dta(i) 
  i_am_rate<-df %>%
    group_by(msr_end_yr) %>%
    summarise(rate=mean(rate*100))
  return(i_am_rate)
})

#Reformat into dataframe, remove redundant information, and rename columns
Table_Fig_2<-as.data.frame(Table_Fig_2)
Table_Fig_2 <-Table_Fig_2 %>% 
  select(-hf.msr_end_yr,-pn.msr_end_yr) %>% 
  rename(Heart_Failure =hf.rate, Pneumonia=pn.rate, AMI=ami.rate, Year=ami.msr_end_yr)

#Melt rows into single summary dataframe and format date variable
Table_Fig_2_melted<-as.data.frame(melt(data = Table_Fig_2,id.vars="Year"))
Table_Fig_2_melted$Year<-as.Date(as.character(Table_Fig_2_melted$Year), "%Y")

#Create figure
Figure_2<-
  
  ggplot(data=Table_Fig_2_melted, aes(x=Year, y=value, group=variable, color=variable)) + geom_line(size=2)+ylim(10,25)+
  xlab('Year')+ylab('Readmission, %') + ggtitle('Figure 2: Thirty-Day Readmission Rates, 2008-2015')+
  scale_x_date(breaks = Table_Fig_2_melted$Year,date_labels= " %Y") + 
  theme(axis.text.x = element_text(angle = 0))+ theme_classic()+
  theme( # remove the vertical grid lines
    panel.grid.major.x = element_blank() ,
    panel.grid.major.y = element_line( size=.1, color="grey"),
    legend.title=element_blank() ) +
  
  geom_vline(xintercept=as.numeric(as.Date("2010-01-01")),linetype=3,size=1,color='black')

ggsave(file="figure_2.pdf",Figure_2,width=11,height=8.5,units = "in",path="Latex File")

#####################
# Replicate Table 2 #
#####################

#Define function to calculate marginal effects for a fixed effects panel data model that replicates the Stata margins command (R does not have a built-in margins function for panel data)
##The arguments for this function are:
### model: formula object for representing the linear model
### mvar: variable name in the model for which to calculate the marginal effect
### data: dataframe object containing observations for the variables included in the model
### scale: scalar transformation to apply to final marginal effect estimate
### group: name of cluster variable in the dataset
### set_covariates: named list of values at which to set covariates included in the model when calculating marginal effects -- default is to set the covariates at their mean, weighted by the mvar variable
margins_plm <- function(model, mvar, data, scale, group, set_covariates = NULL){
  #Format dataset as panel data
  capture.output(invisible(data <- pdata.frame(data, index = group)))
  #Fit fixed effects model
  fit <- plm(formula = model, data = data, model = "within")
  #Create list of names for each coefficient in the fitted model
  beta_list <- paste("beta", seq(from = 1, to = length(fit$coefficients), by = 1), sep = "")
  
  #Calculate the number of groups in the data
  number_groups <- length(unique(data[,group]))
  #Calculate the number of terms in the model
  number_terms <- length(beta_list)
  #Calculate the number of observations in the model
  number_observations <- nrow(data)
  #Calculate the clustered variance-covariance matrix of the coefficients in the fitted model, using the same degrees of freedom adjustment applied by Stata
  var.covar <- ((number_groups)/(number_groups - 1))*(number_observations/(number_observations - number_terms - number_groups))*vcovHC(fit, type = "HC0", cluster = "group")
  
  #Define scalar variables representing each coefficient in the model and set their value at the coefficient estimates
  for (i in 1:length(beta_list)){
    assign(beta_list[i],fit$coefficients[i])
  }
  
  #Create list of names for each variable included in the model (except the dependent variable)
  model.variables <- all.vars(model)[2:length(all.vars(model))]
  #Create list of means for each variable included in the model
  variable.means <- apply(select(data, model.variables),2,weighted.mean, w = data[,mvar])
  #Define scalar variables representing each variable in the model and set their value at their respective mean
  for (i in 1:length(model.variables)){
    assign(model.variables[i], as.numeric(variable.means[i]))
  }
  
  #Identify list of variables that is to be set to the values included in the set_covariates argument
  reset_covariates <- which(model.variables %in% names(set_covariates))
  #Set variables to their value in the set_covariates argument
  for (i in reset_covariates){
    assign(model.variables[i], as.numeric(set_covariates[model.variables[i]]))
  }
  
  #Create list of terms included in the model
  model.terms <- attr(terms.formula(model), "term.labels")
  #Change syntax of interaction terms so that the model can be evaluated in R
  model.terms <- str_replace_all(model.terms,":","*")
  #Add in coefficient variables to represent the final fitted model
  model.terms <- paste(model.terms, beta_list, sep = "*")
  #Collapse terms to create a final model formula
  model.formula <- paste(model.terms, collapse = " + ")
  #Parse string so that formula can be evaluated by R
  model.function <- parse(text = model.formula)
  #Calculate the marginal effect of the mvar argument (scaled by the scale argument)
  model.margin <- scale*eval(D(model.function, mvar))
  
  #Calculate the gradient of the marginal effect formula with respect to the coefficients
  model.margin.gradient <- as.numeric(attr(eval(deriv(D(model.function, mvar),beta_list)), "gradient"))
  
  #Apply delta method to estimate standard error of marginal effect (scaled by the scale argument)
  margin.se <- scale*sqrt(as.numeric(t(model.margin.gradient) %*% var.covar %*% model.margin.gradient))
  
  #Calculate confidence interval from estimated standard error
  upper.ci <- model.margin + qnorm(0.975)*margin.se
  lower.ci <- model.margin - qnorm(0.975)*margin.se
  
  #Package point estimate and confidence interval into final result list
  result <- c(Estimate = model.margin, Lower.95.CI = lower.ci, Upper.95.CI = upper.ci)
  
  return(result)
}

#Define function to calculate the first difference of the marginal effect for a fixed effects panel data model between two levels of covariates that again replicates the Stata margins command (R does not have a built-in margins function for panel data)
##The arguments for this function are the same as the margins_plm function defined above, except it allows covariates to be set at two different levels, and calculates the difference between the marginal effect at each level
margins_plm_delta <- function(model, mvar, data, scale, group, set_covariates.0, set_covariates.1){
  capture.output(invisible(data <- pdata.frame(data, index = group)))
  fit <- plm(formula = model, data = data, model = "within")
  beta_list <- paste("beta", seq(from = 1, to = length(fit$coefficients), by = 1), sep = "")
  
  number_groups <- length(unique(data[,group]))
  number_terms <- length(beta_list)
  number_observations <- nrow(data)
  
  var.covar <- ((number_groups)/(number_groups - 1))*(number_observations/(number_observations - number_terms - number_groups))*vcovHC(fit, type = "HC0", cluster = "group")
  
  for (i in 1:length(beta_list)){
    assign(beta_list[i],fit$coefficients[i])
  }
  
  env.0 <- new.env()
  env.1 <- new.env()
  
  model.variables <- all.vars(model)[2:length(all.vars(model))]
  
  variable.means <- apply(select(data, model.variables),2,weighted.mean, w = data[,mvar])
  
  for (i in 1:length(model.variables)){
    assign(model.variables[i], as.numeric(variable.means[i]), envir = env.0)
    assign(model.variables[i], as.numeric(variable.means[i]), envir = env.1)
  }
  
  reset_covariates <- which(model.variables %in% names(set_covariates.1))
  
  for (i in reset_covariates){
    assign(model.variables[i], as.numeric(set_covariates.0[model.variables[i]]), envir = env.0)
    assign(model.variables[i], as.numeric(set_covariates.1[model.variables[i]]), envir = env.1)
  }
  
  model.terms <- attr(terms.formula(model), "term.labels")
  model.terms <- str_replace_all(model.terms,":","*")
  model.terms <- paste(model.terms, beta_list, sep = "*")
  model.formula <- paste(model.terms, collapse = " + ")
  model.function <- parse(text = model.formula)
  model.margin <- scale*(eval(D(model.function, mvar), envir = env.1) - eval(D(model.function, mvar), envir = env.0))
  
  model.margin.gradient <- as.numeric(attr(eval(deriv(D(model.function, mvar),beta_list), envir = env.1), "gradient")) - as.numeric(attr(eval(deriv(D(model.function, mvar),beta_list), envir = env.0), "gradient"))
  
  margin.se <- scale*sqrt(as.numeric(t(model.margin.gradient) %*% var.covar %*% model.margin.gradient))
  
  upper.ci <- model.margin + qnorm(0.975)*margin.se
  
  lower.ci <- model.margin - qnorm(0.975)*margin.se
  
  result <- c(Estimate = model.margin, Lower.95.CI = lower.ci, Upper.95.CI = upper.ci)
  
  return(result)
}

#Define a function that will package the results of the margins_plm and margins_plm_delta function into a format that matches the results included in Ryan et al.
table.present <- function(margin.result){
  result <- paste(sprintf("%.2f", round(margin.result[1],2)), " (",sprintf("%.2f", round(margin.result[2],2)), ", ", sprintf("%.2f", round(margin.result[3],2)),")",collapse = "")
  return(result)
}

#Define the model used by Ryan et al.
model <- formula(rate ~ msr_end_yr + post_treat + post_treat:(mu2r + aco2r + bpci2r) + post_treat:(mu2r:aco2r + bpci2r:aco2r + mu2r:bpci2r) + post_treat:mu2r:bpci2r:aco2r + post_treat:(teach + beds + dshpct + urban))

#Define different combinations of participation in voluntary programs
covariate_levels <- data.frame(mu2r = c(0,1,0,1,1,1), aco2r = c(0,0,1,0,1,1), bpci2r = c(0,0,0,1,0,1))
rownames(covariate_levels) <- c("None", "MU.Only", "ACO.Only", "MU.BPCI", "MU.ACO", "All")

#Define scale variable for the impact of the HRRP on readmission rates -- multiply by 12 to convert months to years and by 100 to convert to percentage points
yr.percent <- 12*100

#Calculate results across all conditions for overall (i.e., all covariates at mean) and baseline (i.e., no participation in voluntary programs) contexts
results_table.2 <- data.frame(AMI = c(as.character(nrow(read_dta(dataset.list["ami"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["ami"]), yr.percent, "prvdr_num")), table.present(margins_plm(model, "post_treat", read_dta(dataset.list["ami"]), yr.percent, "prvdr_num", covariate_levels[1,]))),
                              "Heart Failure" = c(as.character(nrow(read_dta(dataset.list["hf"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["hf"]), yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["hf"]), yr.percent, "prvdr_num", covariate_levels[1,]))),
                              "Pneumonia" = c(as.character(nrow(read_dta(dataset.list["pn"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["pn"]), yr.percent, "prvdr_num")), table.present(margins_plm(model, "post_treat", read_dta(dataset.list["pn"]), yr.percent, "prvdr_num", covariate_levels[1,]))), stringsAsFactors = FALSE)

#Calculate first difference effect on marginal effect of HRRP from switching from baseline (i.e., no participation in voluntary programs) to various combinations of participation in voluntary programs
for(i in 2:nrow(covariate_levels)){
  covariate_level <- covariate_levels[i,]
  results_table.2[i+2,] <- c(table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["ami"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["hf"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["pn"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)))
}

#Package and export results to Latex table
rownames(results_table.2) <- c("Observations", "Overall HRRP Estimate", rownames(covariate_levels))
print(xtable(results_table.2, label = "table2"), file = "Latex File/table_2.tex", floating = FALSE)

#####################
# Replicate Table 3 #
#####################

# initializing the list of results
results_list = vector(mode = "list", length = 3)

# checking the name of the dataset
names(dataset.list[1])

# running the loop to generate the matrices
for (data_index in 1:length(dataset.list))
{
  name = names(dataset.list[data_index]) # getting the name of the dataset
  dataset = read_dta(dataset.list[name]) # reading in the dataset
  
  # setting the program var to 1
  dataset$program = 1
  
  # subsetting the data only for treatment groups
  dataset_treat = dataset[which(dataset$treat == 1),]
  
  # defining the model
  model = as.formula("rate~msr_end_yr+post_treat+post_treat:(mu2r+aco2r+bpci2r+teach+beds+dshpct+urban)+post_treat:mu2r:aco2r+post_treat:mu2r:bpci2r+post_treat:aco2r:bpci2r+post_treat:mu2r:aco2r:bpci2r")
  
  ### calculating the weighted means:
  # array of variables to calculate means from
  model_var = c('msr_end_yr', 'post_treat', 'mu2r', 'aco2r', 'bpci2r', 'teach', 'beds', 'dshpct', 'urban')
  
  # defining wt_mean_arr
  model.variables <- all.vars(model)[2:length(all.vars(model))]
  wt_mean_arr = array(dim = length(model.variables))
  
  # for-loop for calculating weighted mean for each variable
  for (k in 1:length(wt_mean_arr))
  {
    wt_mean_arr[k] = weighted.mean(x = as.matrix(dataset_treat[,model.variables[k]]),
                                   w = as.matrix(dataset_treat[,'post_treat']))
  }
  
  # printing out the weighted means
  names(wt_mean_arr) = model.variables
  wt_mean_arr
  
  # further defining the quantile variables for beds and dshpct
  beds_25 = wtd.quantile(x = dataset_treat$beds, 
                         q = 0.25, 
                         weight=dataset_treat$post_treat)
  
  beds_75 = wtd.quantile(x = dataset_treat$beds, 
                         q = 0.75, 
                         weight=dataset_treat$post_treat)
  
  dshpct_25 = wtd.quantile(x = dataset_treat$dshpct, 
                           q = 0.25, 
                           weight=dataset_treat$post_treat)
  
  dshpct_75 = wtd.quantile(x = dataset_treat$dshpct, 
                           q = 0.75, 
                           weight=dataset_treat$post_treat)
  
  ### defining different covariate matrices:
  # 1. time variable  
  msr_end_yr_arr = c(rep(wt_mean_arr['msr_end_yr'], 15))
  
  # 2. post_treat variable
  post_treat_arr = c(rep(wt_mean_arr['post_treat'], 15))
  
  # 3. mu2r variable
  mu2r_arr = c(rep(wt_mean_arr['mu2r'], 9),
               0, 1, 0, 1, 1, 1)
  
  # 4. aco2r variable
  aco2r_arr = c(rep(wt_mean_arr['aco2r'], 9),
                0, 0, 1, 1, 0, 1)
  
  # 5. bpci2r variable
  bpci2r_arr = c(rep(wt_mean_arr['bpci2r'], 9),
                 0, 0, 0, 0, 1, 1)
  
  # 6. teach variable
  teach_arr = c(wt_mean_arr['teach'], 
                0, 
                1, 
                rep(wt_mean_arr['teach'], 12))
  
  # 7. urban variable
  urban_arr = c(rep(wt_mean_arr['urban'], 3),
                0,
                1,
                rep(wt_mean_arr['urban'], 10))
  
  # 8. beds variable
  beds_arr = c(rep(wt_mean_arr['beds'], 5),
               beds_25,
               beds_75,
               rep(wt_mean_arr['beds'], 8))
  
  # 9. dshpct variable
  dshpct_arr = c(rep(wt_mean_arr['dshpct'], 7),
                 dshpct_25,
                 dshpct_75,
                 rep(wt_mean_arr['dshpct'], 6))
  
  # putting the covariates tgt in a dataframe
  covariate_levels = data.frame(msr_end_yr = msr_end_yr_arr,
                                post_treat = post_treat_arr,
                                mu2r = mu2r_arr,
                                aco2r = aco2r_arr,
                                bpci2r = bpci2r_arr,
                                teach = teach_arr,
                                beds = beds_arr,
                                dshpct = dshpct_arr,
                                urban = urban_arr)
  
  # # printing the covariate_levels
  # print(covariate_levels)
  
  # running the function
  results_mat = matrix(nrow = nrow(covariate_levels), 
                       ncol = 3)
  
  # for-loop that stores the results for each covariate variation
  for (i in 1:nrow(results_mat))
  {
    print(paste("Iterating at iter num: ", as.character(i),
                " for dataset ", names(dataset.list[data_index])))
    results_mat[i,] = margins_plm(model = model, 
                                  mvar = 'post_treat',
                                  data = dataset_treat, 
                                  scale = 12*100,
                                  set_covariates = covariate_levels[i,],
                                  group = 'prvdr_num')
    
  }
  
  # reversing the matrix
  rev_results_mat = apply(results_mat, 2, rev)
  
  # setting rownames for the resulting matrix
  var_names = c('All Programs',
                'Meaningful use and BPCI',
                'Meaningful use and ACO',
                'ACO only',
                'Meaningful use only',
                'No programs',
                'High DSH',
                'Low DSH',
                'Large hospitals',
                'Small hospitals',
                'Urban',
                'Rural',
                'Teach=Y',
                'Teach=N',
                'Overall')
  
  # assigning names to each confidence interval info.
  rownames(rev_results_mat) = var_names
  
  # # printing out the results matrix
  # rev_results_mat
  
  # storing the results in the results list
  results_list[[data_index]] = rev_results_mat
}

# take a glance at the list
results_list

## using rbind to combine the lists before
# converting it to a matrix format
results_rbind = rbind(results_list[[1]],
                      results_list[[2]],
                      results_list[[3]])

# converting the results into a matrix format
results_mat <- matrix(results_rbind, 
                      nrow = length(var_names)*3,
                      ncol = 3, byrow = FALSE)

# filling in row and column names
rownames(results_mat) = rep(var_names,3)
colnames(results_mat) = c('mean', 'lower', 'upper')

# turning the results into a df
results_df = as.data.frame(results_mat)
results_df$names = rownames(results_df)

results_df$dat = rownames(results_df) # initializing

# adding 95% CI information in the ylabels
for (k in 1:length(dataset.list))
{
  for (m in 1:length(var_names))
  {
    # print(length(var_names)*(k-1) + m)
    index = as.numeric(length(var_names)*(k-1) + m)
    # print(names(data_list[k]))
    str = as.character(names(dataset.list[k]))
    print(var_names[m])
    results_df$dat[index] = str
    results_df$names[index] = paste(as.character(var_names[m]),
                                    ": ",
                                    as.character(format(round(results_df$mean[index],2), nsmall = 2)),
                                    " (",
                                    as.character(format(round(results_df$lower[index],2), nsmall = 2)),
                                    ", ",
                                    as.character(format(round(results_df$upper[index],2), nsmall = 2)),
                                    ")",
                                    sep = "")
  }
}

# adding header labels: AMI, HEART FAILURE, and PNEUMONIA
ami_label = data.frame(mean = NA, lower = NA, upper = NA,
                       names = "AMI", dat = NA, stringsAsFactors = FALSE)
hf_label = data.frame(mean = NA, lower = NA, upper = NA,
                      names = "HEART FAILURE", dat = NA, stringsAsFactors = FALSE)
pn_label = data.frame(mean = NA, lower = NA, upper = NA,
                      names = "PNEUMONIA", dat = NA, stringsAsFactors = FALSE)
space_filler = data.frame(mean = NA, lower = NA, upper = NA,
                          names = " ", dat = NA, stringsAsFactors = FALSE)
space_filler_2 = data.frame(mean = NA, lower = NA, upper = NA,
                            names = "  ", dat = NA, stringsAsFactors = FALSE)

# combining the above labels with the data below
results_df_test = rbind(ami_label, results_df[1:15,], space_filler,
                        hf_label, results_df[16:30,], space_filler_2,
                        pn_label, results_df[31:45,])

# factorizing the names before plotting on ggplot
results_df_test$names = factor(results_df_test$names, results_df_test$names)

##### plotting the ggplot #####
figure_3 <- ggplot(data = results_df_test, aes(y = c(mean),
                                   x = reorder(names, desc(names)))) +
  geom_errorbar(aes(ymin = lower, ymax = upper), na.rm = TRUE) +
  geom_point(stat = "identity", na.rm = TRUE) +
  ylab('Estimates of HRRP') +
  xlab("Hospital Characteristic") +
  coord_flip()

ggsave(file="figure_3.pdf",figure_3,width=8.5,height=11,units = "in",path="Latex File")

####################
##                ##
##Extensions Begin##
##                ##
####################

############################################################################################################
#################################### EXTENSION - Matching ##################################################
############################################################################################################

#Function to Prepare Dataset for Use in PanelMatch, Weight Outcome by Time from HRRP Implementation, and add interaction terms
data.prepare <- function(data, time.var, unit.var, outcome.var, weight.var, weight, implementation.time, treatment.vars){
  data[,c(time.var)] <- lapply(data[,c(time.var)], as.integer)
  data[,c(unit.var)] <- lapply(data[,c(unit.var)], as.integer)
  data <- as.data.frame(data)
  
  data[which(data[,c(time.var)] >= implementation.time),c(paste(outcome.var,"weighted",sep="."))] <- (weight/data[which(data[,c(time.var)] >= implementation.time),c(weight.var)])*data[which(data[,c(time.var)] >= implementation.time),c(outcome.var)]
  data[which(data[,c(time.var)] < implementation.time),c(paste(outcome.var,"weighted",sep="."))] <- data[which(data[,c(time.var)] < implementation.time),c(outcome.var)]
  
  treatment.vars.restrict <- treatment.vars
  
  for(var.name1 in treatment.vars){
    treatment.vars.restrict <- setdiff(treatment.vars.restrict, var.name1)
    for(var.name2 in treatment.vars.restrict){
      data[,c(paste(var.name1,var.name2,sep="."))] <- data[,c(var.name1)]*data[,c(var.name2)]
    }
  }
  
  data[,c(paste(treatment.vars,collapse="."))] <- apply(data[,treatment.vars], 1, prod)
  
  return(data)
}

#Define function that uses PanelMatch and PanelEstimate to estimate and package the difference-in-differences estimator
estimate.effect <- function(data, lag, covariate.formula, treatment.var, time.var, unit.var, outcome.var){
  pm <- PanelMatch(lag = lag, time.id = time.var, unit.id = unit.var, treatment = treatment.var, refinement.method = "mahalanobis", data = data, outcome.var = outcome.var, qoi = "att", covs.formula = covariate.formula, lead = 0, size.match = 10)
  pe <- PanelEstimate(inference = "bootstrap", sets = pm, data = data)
  result <- 100*c(pe$coefficients, pe$coefficients + qnorm(0.975)*pe$standard.error, pe$coefficients - qnorm(0.975)*pe$standard.error)
  names(result) <- c("Point.Estimate", "CI.95.Upper", "CI.95.Lower")
  return(result)
}

#Set the covariates used to refine matched sets with PanelMatch
covariate.model <- as.formula(~  lag("rate", 1:3) + teach + beds + dshpct + urban)

#Set list of different treatments to be tested
treatment.list <- c("aco2r", "mu2r", "bpci2r", "aco2r.mu2r", "mu2r.bpci2r", "aco2r.bpci2r", "aco2r.mu2r.bpci2r")

#Cacluate difference-in-differences estimator for the three conditions of interest, across the list of different treatments
for(i in 1:length(dataset.list)){
  data.name <- names(dataset.list)[i]
  data.temp <- data.prepare(data = read_dta(dataset.list[i]), time.var =  "msr_end_yr", unit.var = "prvdr_num",   outcome.var =  "rate", weight.var =   "post_treat", weight =  12, implementation.time =   2010, treatment.vars =  c("aco2r", "mu2r", "bpci2r"))
  effect.results <- data.frame(Point.Estimate = numeric(0), CI.95.Upper = numeric(0), CI.95.Lower = numeric(0))
  
  for(j in 1:length(treatment.list)){
    effect.temp <- estimate.effect(data.temp, 3, covariate.model, treatment.list[j], "msr_end_yr", "prvdr_num", "rate.weighted")
    effect.results[j,] <- effect.temp
    row.names(effect.results)[j] <- treatment.list[j]
  }
  
  attr(effect.results, "condition") <- names(dataset.list[i])
  
  assign(paste("effect.results", names(dataset.list)[i], sep = "."), effect.results)
  
}

#Calculate same estimates as previous for-loop, but without applying the weighting scheme
for(i in 1:length(dataset.list)){
  data.name <- names(dataset.list)[i]
  data.temp <- data.prepare(data = read_dta(dataset.list[i]), time.var =  "msr_end_yr", unit.var = "prvdr_num",   outcome.var =  "rate", weight.var =   "post_treat", weight =  12, implementation.time =   2010, treatment.vars =  c("aco2r", "mu2r", "bpci2r"))
  effect.results <- data.frame(Point.Estimate = numeric(0), CI.95.Upper = numeric(0), CI.95.Lower = numeric(0))
  
  for(j in 1:length(treatment.list)){
    effect.temp <- estimate.effect(data.temp, 3, covariate.model, treatment.list[j], "msr_end_yr", "prvdr_num", "rate")
    effect.results[j,] <- effect.temp
    row.names(effect.results)[j] <- treatment.list[j]
  }
  
  attr(effect.results, "condition") <- names(dataset.list[i])
  
  assign(paste("effect.results.unweighted", names(dataset.list)[i], sep = "."), effect.results)
  
}

#Define function to package results from the above for-loops for output to LaTex
package.results <- function(results.list){
  output <- data.frame()
  
  for(i in 1:length(results.list)){
    result.format <- apply(results.list[i][[1]], 1, function(x){paste(round(x[1], digits=2), " (", round(x[2], digits = 2), ", ", round(x[3], digits = 2), ")", sep = "")})
    output[1:7,i] <- result.format
    colnames(output)[i] <- attr(results.list[i][[1]], "condition")
  }
  
  rownames(output) <- names(result.format)
  
  return(output)
  
}

#Package results from for-loops and output to LaTex
weighted.results <- package.results(list(effect.results.ami, effect.results.hf, effect.results.pn))
unweighted.results <- package.results(list(effect.results.unweighted.ami, effect.results.unweighted.hf, effect.results.unweighted.pn))

print(xtable(weighted.results, label = "matchWeighted"), file = "Latex File/matching_weighted_results.tex", floating = FALSE)
print(xtable(unweighted.results, label = "matchUnweighted"), file = "Latex File/matching_unweighted_results.tex", floating = FALSE)

#Define function to estimate difference-in-differences estimator for leading periods after treatment
estimate.effect.lead <- function(data, lag, lead, covariate.formula, treatment.var, time.var, unit.var, outcome.var){
  pm <- PanelMatch(lag = lag, time.id = time.var, unit.id = unit.var, treatment = treatment.var, refinement.method = "mahalanobis", data = data, outcome.var = outcome.var, qoi = "att", covs.formula = covariate.formula, lead = lead, size.match = 10)
  pe <- PanelEstimate(inference = "bootstrap", sets = pm, data = data)
  return(pe)
}

#Calculate difference-in-differences estimator for leading periods and export plots
for(i in 1:length(dataset.list)){
  data.name <- names(dataset.list)[i]
  data.temp <- data.prepare(data = read_dta(dataset.list[i]), time.var =  "msr_end_yr", unit.var = "prvdr_num",   outcome.var =  "rate", weight.var =   "post_treat", weight =  12, implementation.time =   2010, treatment.vars =  c("aco2r", "mu2r", "bpci2r"))
  effect.results <- data.frame(Point.Estimate = numeric(0), CI.95.Upper = numeric(0), CI.95.Lower = numeric(0))
  
  for(j in 1:length(treatment.list)){
    effect.temp <- estimate.effect.lead(data.temp, 3, 0:2, covariate.model, treatment.list[j], "msr_end_yr", "prvdr_num", "rate.weighted")
    pdf(paste("Latex File/", names(dataset.list)[i], "_", treatment.list[j], "_plot.pdf", sep = ""))
    par(mar=c(4,4,2,4))
    plot(effect.temp, main = "")
    dev.off()
  }
}

#Generate Coavariate Balance Plots for the Different Treatments on the AMI data
for(i in 1:length(treatment.list)){
  data.temp <- data.prepare(data = read_dta(dataset.list["ami"]), time.var =  "msr_end_yr", unit.var = "prvdr_num",   outcome.var =  "rate", weight.var =   "post_treat", weight =  12, implementation.time =   2010, treatment.vars =  c("aco2r", "mu2r", "bpci2r"))
  pm <- PanelMatch(lag = 3, time.id = "msr_end_yr", unit.id = "prvdr_num", treatment = treatment.list[i], refinement.method = "mahalanobis", data = data.temp, outcome.var = "rate.weighted", qoi = "att", covs.formula = covariate.model, lead = 0, size.match = 10)
  
  balance.temp <- as.data.frame(get_covariate_balance(pm$att, data = data.temp, covariates = c("rate", "teach", "beds", "dshpct", "urban"), plot = F))
  
  balance.temp$time <- lapply(rownames(balance.temp), function(x){gsub("_","-",x)})
  
  balance.temp <- as.data.frame(lapply(balance.temp, unlist))
  balance.temp <- melt(balance.temp)
  
  plot.temp <- ggplot(data = balance.temp, aes(x = time, y = value, group = variable)) + geom_line(aes(color=variable)) + scale_colour_discrete(name  ="Matched Covariates", breaks=c("rate", "teach", "beds", "dshpct", "urban"), labels=c("Readmission Rate", "Teaching Hospital", "Number of Beds", "Disproportionate Share %", "Urban Hospital")) + labs(x="Time before Implementation", y="Difference between Treated and Controls (SD)") + scale_x_discrete(limits = c("t-3", "t-2", "t-1", "t-0")) + theme(legend.position="right")
  
  ggsave(file=paste("ami", "_", treatment.list[i], "_balance_plot.pdf", sep = ""),plot.temp,width=11,height=8.5,units = "in",path="Latex File")
}

############################################################################################################
#################################### EXTENSION - PLACEBO ###################################################
############################################################################################################

#data Wrangling
### COMMENT: The commented portion below (lines 788-1117) are used to
# prepare the data file "qualmeasures.rds", which we make available.

### PLEASE PROCEED TO LINE 1117!


#################### DATA WRANGLING #######################
persistent_qual_measures<-c("AMI_1","AMI_7","AMI_7A","AMI_8","AMI_8A","HF_2","SCIP_1","SCIP_2","SCIP_3","SCIP_VTE_2")

# df_2008<-read.csv("2008.csv")
# # Adding Missing Quality Measure Codes for Consinstency with Subsequent Years
# df_2008$Provider.Number<-as.factor(df_2008$Provider.Number)
# df_2008$Measure.ID<-sub("Heart Attack Patients Given Aspirin at Arrival","AMI_1",df_2008$Measure.Name)
# df_2008$Measure.ID<-sub("Heart Attack Patients Given Fibrinolytic Medication Within 30 Minutes Of Arrival","AMI_7A",df_2008$Measure.ID)
# df_2008$Measure.ID<-sub("Heart Attack Patients Given PCI Within 90 Minutes Of Arrival","AMI_8A",df_2008$Measure.ID)
# df_2008$Measure.ID<-sub("Heart Failure Patients Given ACE Inhibitor or ARB for Left Ventricular Systolic Dysfunction \\(LVSD\\)","HF_2",df_2008$Measure.ID)
# df_2008$Measure.ID<-sub("Surgery Patients Who Received Preventative Antibiotic\\(s\\) One Hour Before Incision","SCIP_1",df_2008$Measure.ID)
# df_2008$Measure.ID<-sub("Surgery Patients Who Received the Appropriate Preventative Antibiotic\\(s\\) for Their Surgery","SCIP_2",df_2008$Measure.ID)
# df_2008$Measure.ID<-sub("Surgery Patients Whose Preventative Antibiotic\\(s\\) are Stopped Within 24 hours After Surgery","SCIP_3",df_2008$Measure.ID)
# df_2008$Measure.ID<-sub("Surgery Patients Who Received Treatment To Prevent Blood Clots Within 24 Hours Before or After Selected Surgeries to Prevent Blood Clots","SCIP_VTE_2",df_2008$Measure.ID)
# df_2008$Measure.ID<-as.factor(df_2008$Measure.ID)
# df_2008$msr_end_yr<-"2008"
# df_2008$Sample_2008<-df_2008$Sample
# df_2008$Score_2008<-df_2008$Score
# df_2008$Score_2008<-sub("%","",df_2008$Score_2008)
# df_2008$Sample_2008<-sub(" patients","",df_2008$Sample_2008)
# #Dropping quality measures not measured in all years from 2008-2016/2015
# df_2008<-subset(df_2008, df_2008$Measure.ID %in% persistent_qual_measures)
# # unique(df_2008$Measure.ID) # Verifying this picked up all 7 codes #
# 
# df_2009<-read.csv("2009.csv")
# df_2009$Measure.Code<-sub("-","_",df_2009$Measure.Code) #fix inconsistent naming convention
# df_2009$Measure.Code<-gsub(" ","",df_2009$Measure.Code) #delete white space
# df_2009$Measure.Code<-sub("SCIP_INF-1","SCIP_1",df_2009$Measure.Code)
# df_2009$Measure.Code<-sub("SCIP_INF-2","SCIP_2",df_2009$Measure.Code)
# df_2009$Measure.Code<-sub("SCIP_INF-3","SCIP_3",df_2009$Measure.Code)
# df_2009$Measure.Code<-sub("SCIP_VTE-2","SCIP_VTE_2",df_2009$Measure.Code)
# df_2009<-subset(df_2009, df_2009$Measure.Code %in% persistent_qual_measures)
# df_2009<-dplyr::rename(df_2009, Measure.ID = Measure.Code)
# df_2009$Measure.ID<-as.factor(df_2009$Measure.ID)
# df_2009$Provider.Number<-as.factor(df_2009$Provider.Number)
# df_2009$Sample_2009<-df_2009$Sample
# df_2009$Score_2009<-df_2009$Score
# df_2009$msr_end_yr<-"2009"
# df_2009$Score_2009<-sub("%","",df_2009$Score_2009)
# df_2009$Sample_2009<-sub(" patients","",df_2009$Sample_2009)
# #unique(df_2009_2$Measure.ID) #
# 
# df_2010<-read.csv("2010.csv")
# df_2010$Measure.Code<-gsub(" ","",df_2010$Measure.Code) #delete white space
# df_2010$Measure.Code<-sub("SCIP_INF_1","SCIP_1",df_2010$Measure.Code)
# df_2010$Measure.Code<-sub("SCIP_INF_2","SCIP_2",df_2010$Measure.Code)
# df_2010$Measure.Code<-sub("SCIP_INF_3","SCIP_3",df_2010$Measure.Code)
# df_2010$Measure.Code<-sub("SCIP_VTE_2","SCIP_VTE_2",df_2010$Measure.Code)
# df_2010$Measure.Code<-sub("AMI_7a","AMI_7A",df_2010$Measure.Code)
# df_2010$Measure.Code<-sub("AMI_8a","AMI_8A",df_2010$Measure.Code)
# df_2010<-subset(df_2010, df_2010$Measure.Code %in% persistent_qual_measures)
# df_2010<-dplyr::rename(df_2010, Measure.ID = Measure.Code)
# #df_2010$Measure.ID<-as.factor(df_2010$Measure.ID)
# df_2010$Provider.Number<-sub("\\'0","",df_2010$Provider.Number) #
# df_2010$Provider.Number<-gsub("\\'","",df_2010$Provider.Number) #
# df_2010$Provider.Number<-as.factor(df_2010$Provider.Number)
# df_2010$msr_end_yr<-"2010"
# df_2010$Score_2010<-df_2010$Score
# df_2010$Sample_2010<-df_2010$Sample
# #unique(df_2010$Measure.Code)
# 
# df_2011<-read.csv("2011.csv")
# df_2011$Measure.Code<-gsub(" ","",df_2011$Measure.Code) #delete white space
# df_2011$Measure.Code<-sub("SCIP_INF_1","SCIP_1",df_2011$Measure.Code)
# df_2011$Measure.Code<-sub("SCIP_INF_2","SCIP_2",df_2011$Measure.Code)
# df_2011$Measure.Code<-sub("SCIP_INF_3","SCIP_3",df_2011$Measure.Code)
# df_2011$Measure.Code<-sub("SCIP_VTE_2","SCIP_VTE_2",df_2011$Measure.Code)
# df_2011$Measure.Code<-sub("AMI_7a","AMI_7A",df_2011$Measure.Code)
# df_2011$Measure.Code<-sub("AMI_8a","AMI_8A",df_2011$Measure.Code)
# df_2011<-subset(df_2011, df_2011$Measure.Code %in% persistent_qual_measures)
# df_2011<-dplyr::rename(df_2011, Measure.ID = Measure.Code)
# df_2011$msr_end_yr<-"2011"
# df_2011$Score_2011<-df_2011$Score
# df_2011$Provider.Number<-sub("\\'0","",df_2011$Provider.Number) #
# df_2011$Provider.Number<-gsub("\\'","",df_2011$Provider.Number) #
# df_2011$Sample_2011<-df_2011$Sample
# #unique(df_2011$Measure.Code)
# 
# df_2012<-read.csv("2012.csv")
# df_2012$Measure.Code<-gsub(" ","",df_2012$Measure.Code) #delete white space
# df_2012$Measure.Code<-sub("SCIP_INF_1","SCIP_1",df_2012$Measure.Code)
# df_2012$Measure.Code<-sub("SCIP_INF_2","SCIP_2",df_2012$Measure.Code)
# df_2012$Measure.Code<-sub("SCIP_INF_3","SCIP_3",df_2012$Measure.Code)
# df_2012$Measure.Code<-sub("SCIP_VTE_2","SCIP_VTE_2",df_2012$Measure.Code)
# df_2012$Measure.Code<-sub("AMI_7a","AMI_7A",df_2012$Measure.Code)
# df_2012$Measure.Code<-sub("AMI_8a","AMI_8A",df_2012$Measure.Code)
# df_2012$Measure.Code<-sub("OP_4","AMI_1",df_2012$Measure.Code)
# df_2012<-subset(df_2012, df_2012$Measure.Code %in% persistent_qual_measures)
# df_2012<-dplyr::rename(df_2012, Measure.ID = Measure.Code)
# df_2012$msr_end_yr<-"2012"
# df_2012$Score_2012<-df_2012$Score
# df_2012$Provider.Number<-sub("\\'0","",df_2012$Provider.Number) #
# df_2012$Provider.Number<-gsub("\\'","",df_2012$Provider.Number) #
# df_2012$Sample_2012<-df_2012$Sample
# #unique(df_2012$Measure.Code)
# 
# df_2013<-read.csv("2013.csv")
# df_2013$Measure.Code<-gsub(" ","",df_2013$Measure.Code) #delete white space
# df_2013$Measure.Code<-sub("SCIP_INF_1","SCIP_1",df_2013$Measure.Code)
# df_2013$Measure.Code<-sub("SCIP_INF_2","SCIP_2",df_2013$Measure.Code)
# df_2013$Measure.Code<-sub("SCIP_INF_3","SCIP_3",df_2013$Measure.Code)
# df_2013$Measure.Code<-sub("SCIP_VTE_2","SCIP_VTE_2",df_2013$Measure.Code)
# df_2013$Measure.Code<-sub("AMI_7a","AMI_7A",df_2013$Measure.Code)
# df_2013$Measure.Code<-sub("AMI_8a","AMI_8A",df_2013$Measure.Code)
# df_2013$Measure.Code<-sub("OP_4","AMI_1",df_2013$Measure.Code)
# df_2013<-subset(df_2013, df_2013$Measure.Code %in% persistent_qual_measures)
# df_2013<-dplyr::rename(df_2013, Measure.ID = Measure.Code)
# df_2013$msr_end_yr<-"2013"
# df_2013$Score_2013<-df_2013$Score
# df_2013$Provider.Number<-sub("\\'0","",df_2013$Provider.Number) #
# df_2013$Provider.Number<-gsub("\\'","",df_2013$Provider.Number) #
# df_2013$Sample_2013<-df_2013$Sample
# # unique(df_2013$Measure.Code)
# 
# df_2014<-read.csv("2014.csv")
# df_2014$Measure.ID<-gsub(" ","",df_2014$Measure.ID) #delete white space
# df_2014$Measure.ID<-sub("SCIP_INF_1","SCIP_1",df_2014$Measure.ID)
# df_2014$Measure.ID<-sub("SCIP_INF_2","SCIP_2",df_2014$Measure.ID)
# df_2014$Measure.ID<-sub("SCIP_INF_3","SCIP_3",df_2014$Measure.ID)
# df_2014$Measure.ID<-sub("SCIP_VTE_2","SCIP_VTE_2",df_2014$Measure.ID)
# df_2014$Measure.ID<-sub("AMI_7a","AMI_7A",df_2014$Measure.ID)
# df_2014$Measure.ID<-sub("AMI_8a","AMI_8A",df_2014$Measure.ID)
# df_2014$Measure.ID<-sub("OP_4","AMI_1",df_2014$Measure.ID)
# df_2014<-subset(df_2014, df_2014$Measure.ID %in% persistent_qual_measures)
# df_2014$msr_end_yr<-"2014"
# df_2014$Score_2014<-df_2014$Score
# df_2014$Provider.Number <- as.character(df_2014$Provider.ID)
# df_2014$Provider.Number <- sub("^0+", "", df_2014$Provider.Number)
# df_2014$Sample_2014<-df_2014$Sample
# #unique(df_2014$Measure.ID)
# 
# df_2015<-read.csv("2015.csv")
# df_2015$Measure.ID<-gsub(" ","",df_2015$Measure.ID) #delete white space
# df_2015$Measure.ID<-sub("SCIP_INF_1","SCIP_1",df_2015$Measure.ID)
# df_2015$Measure.ID<-sub("SCIP_INF_2","SCIP_2",df_2015$Measure.ID)
# df_2015$Measure.ID<-sub("SCIP_INF_3","SCIP_3",df_2015$Measure.ID)
# df_2015$Measure.ID<-sub("SCIP_VTE_2","SCIP_VTE_2",df_2015$Measure.ID)
# df_2015$Measure.ID<-sub("AMI_7a","AMI_7A",df_2015$Measure.ID)
# df_2015$Measure.ID<-sub("AMI_8a","AMI_8A",df_2015$Measure.ID)
# df_2015$Measure.ID<-sub("OP_4","AMI_1",df_2015$Measure.ID)
# df_2015<-subset(df_2015, df_2015$Measure.ID %in% persistent_qual_measures)
# df_2015$msr_end_yr<-"2015"
# df_2015$Score_2015<-df_2015$Score
# df_2015$Provider.Number <- as.character(df_2015$Provider.ID)
# df_2015$Provider.Number <- sub("^0+", "", df_2015$Provider.Number)
# df_2015$Sample_2015<-df_2015$Sample
# # unique(df_2015$Measure.ID)
# 
# ##Joining by provider number and measure ID
# consolidated<-list(df_2008,df_2009,df_2010,df_2011,df_2012,df_2013,df_2014,df_2015) %>%
#   reduce(full_join, by=c("Provider.Number","Measure.ID"))
# 
# ##Keeping subset of importnat rows
# qual_performance<-consolidated[,c("Measure.ID","Measure.Name.y","Provider.Number","Hospital.Name.x","Condition.x","Score_2008","Score_2009","Score_2010","Score_2011","Score_2012","Score_2013","Score_2014","Score_2015","Sample_2008","Sample_2009","Sample_2010","Sample_2011","Sample_2012","Sample_2013","Sample_2014","Sample_2015")]
# 
# ##Dealing with factor variables
# as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
# 
# qual_performance$Score_2008<-as.numeric(qual_performance$Score_2008)/100
# qual_performance$Score_2009<-as.numeric(qual_performance$Score_2009)/100
# qual_performance$Score_2010<-as.numeric.factor(qual_performance$Score_2010)/100
# qual_performance$Score_2011<-as.numeric.factor(qual_performance$Score_2011)/100
# qual_performance$Score_2012<-as.numeric.factor(qual_performance$Score_2012)/100
# qual_performance$Score_2013<-as.numeric.factor(qual_performance$Score_2013)/100
# qual_performance$Score_2014<-as.numeric.factor(qual_performance$Score_2014)/100
# qual_performance$Score_2015<-as.numeric.factor(qual_performance$Score_2015)/100
# 
# qual_performance$Sample_2008<-as.numeric(qual_performance$Sample_2008)
# qual_performance$Sample_2009<-as.numeric(qual_performance$Sample_2009)
# qual_performance$Sample_2010<-as.numeric.factor(qual_performance$Sample_2010)
# qual_performance$Sample_2011<-as.numeric.factor(qual_performance$Sample_2011)
# qual_performance$Sample_2012<-as.numeric.factor(qual_performance$Sample_2012)
# qual_performance$Sample_2013<-as.numeric.factor(qual_performance$Sample_2013)
# qual_performance$Sample_2014<-as.numeric.factor(qual_performance$Sample_2014)
# qual_performance$Sample_2015<-as.numeric.factor(qual_performance$Sample_2015)
# 
# ##Learning about the missingness
# No_Missingness <- qual_performance %>%
#   group_by(Measure.ID) %>%
#   summarise(non_na_count_08 = sum(!is.na(Score_2008)),
#             non_na_count_09 = sum(!is.na(Score_2009)),
#             non_na_count_10 = sum(!is.na(Score_2010)),
#             non_na_count_11 = sum(!is.na(Score_2011)),
#             non_na_count_12 = sum(!is.na(Score_2012)),
#             non_na_count_13 = sum(!is.na(Score_2013)),
#             non_na_count_14 = sum(!is.na(Score_2014)),
#             non_na_count_15 = sum(!is.na(Score_2015)))
# No_Missingness
# 
# ##Comparing with original Access files/extracted CSV files, missingness appears consinstent with original data
# 
# 
# # Addressing Sample Size Consinstency Issues -- In Later Years
# # CMS Suppressed Qual Measure Reporting for Hospitals w Very Small Numbers of Cases.
# # However, in Early Years, This Was Not Done.  e Need to ID Cut Points Used in Later Years as to Apply to Early Years
# # to Ensure Consinstency
# 
# Sample_Size_Consistency <-
#   qual_performance %>%
#   group_by(Measure.ID) %>%
#   filter(Sample_2013>1) %>%
#   summarise(info=min(Sample_2013))
# Sample_Size_Consistency
# 
# #Finding: from 2013-2015, CMS supprssed results if <11 cases.  But not the case before 2013.
# #Need to make consistent
# 
# #Suppressing all scores corresponding to 11 cases or fewer for all years
# 
# attach(qual_performance)
# 
# scores<-c("Score_2008","Score_2009","Score_2010","Score_2011","Score_2012","Score_2013","Score_2014","Score_2015")
# years<-c("2008","2009","2010","2011","2012","2013","2014","2015")
# 
# for (i in scores){
#   i<-ifelse(i>10,i,NA)
# }
# detach()
# 
# #Creating condition-specific dataframes
# measure<-split(qual_performance, qual_performance$Measure.ID)
# df_AMI_1<-measure$AMI_1
# df_AMI_7<-measure$AMI_7A
# df_AMI_8<-measure$AMI_8A
# df_HF_2<-measure$HF_2
# df_SCIP_1<-measure$SCIP_1
# df_SCIP_2<-measure$SCIP_2
# df_SCIP_3<-measure$SCIP_3
# df_SCIP_VTE_2<-measure$SCIP_VTE_2
# 
# #Measure-by-measure/ data-frame-by-data-frame melting,
# # AND imputation for proviers missing just 1 of 8 years using Amelia with linear time effects
# AMI_1_melted<-melt(data = df_AMI_1, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                    value.name = "Score")
# Missingness_AMI_1 <- AMI_1_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# AMI_1_melted<-subset.data.frame(AMI_1_melted,AMI_1_melted$Provider.Number %in% Missingness_AMI_1$Provider.Number)
# AMI_1_melted$Year<-as.numeric(sub("Score_","",AMI_1_melted$Year))
# AMI_1_ameliaed <- amelia(AMI_1_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# AMI_1_melted<-AMI_1_ameliaed$imputations$imp1
# 
# AMI_7_melted<-melt(data = df_AMI_7, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                    value.name = "Score")
# Missingness_AMI_7 <- AMI_7_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# AMI_7_melted<-subset.data.frame(AMI_7_melted,AMI_7_melted$Provider.Number %in% Missingness_AMI_7$Provider.Number)
# AMI_7_melted$Year<-as.numeric(sub("Score_","",AMI_7_melted$Year))
# AMI_7_ameliaed <- amelia(AMI_7_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# AMI_7_melted<-AMI_7_ameliaed$imputations$imp1
# 
# AMI_8_melted<-melt(data = df_AMI_8, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                    value.name = "Score")
# Missingness_AMI_8 <- AMI_8_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# AMI_8_melted<-subset.data.frame(AMI_8_melted,AMI_8_melted$Provider.Number %in% Missingness_AMI_8$Provider.Number)
# AMI_8_melted$Year<-as.numeric(sub("Score_","",AMI_8_melted$Year))
# AMI_8_ameliaed <- amelia(AMI_8_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# AMI_8_melted<-AMI_8_ameliaed$imputations$imp1
# 
# HF_2_melted<-melt(data = df_HF_2, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                   value.name = "Score")
# Missingness_HF_2 <- HF_2_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# HF_2_melted<-subset.data.frame(HF_2_melted,HF_2_melted$Provider.Number %in% Missingness_HF_2$Provider.Number)
# HF_2_melted$Year<-as.numeric(sub("Score_","",HF_2_melted$Year))
# HF_2_ameliaed <- amelia(HF_2_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# HF_2_melted<-HF_2_ameliaed$imputations$imp1
# 
# SCIP_1_melted<-melt(data = df_SCIP_1, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                     value.name = "Score")
# Missingness_SCIP_1 <- SCIP_1_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# SCIP_1_melted<-subset.data.frame(SCIP_1_melted,SCIP_1_melted$Provider.Number %in% Missingness_SCIP_1$Provider.Number)
# SCIP_1_melted$Year<-as.numeric(sub("Score_","",SCIP_1_melted$Year))
# SCIP_1_ameliaed <- amelia(SCIP_1_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# SCIP_1_melted<-SCIP_1_ameliaed$imputations$imp1
# 
# SCIP_2_melted<-melt(data = df_SCIP_2, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                     value.name = "Score")
# Missingness_SCIP_2 <- SCIP_2_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# SCIP_2_melted<-subset.data.frame(SCIP_2_melted,SCIP_2_melted$Provider.Number %in% Missingness_SCIP_2$Provider.Number)
# SCIP_2_melted$Year<-as.numeric(sub("Score_","",SCIP_2_melted$Year))
# SCIP_2_ameliaed <- amelia(SCIP_2_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# SCIP_2_melted<-SCIP_2_ameliaed$imputations$imp1
# 
# SCIP_3_melted<-melt(data = df_SCIP_3, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                     value.name = "Score")
# Missingness_SCIP_3 <- SCIP_3_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# SCIP_3_melted<-subset.data.frame(SCIP_3_melted,SCIP_3_melted$Provider.Number %in% Missingness_SCIP_3$Provider.Number)
# SCIP_3_melted$Year<-as.numeric(sub("Score_","",SCIP_3_melted$Year))
# SCIP_3_ameliaed <- amelia(SCIP_3_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# SCIP_3_melted<-SCIP_3_ameliaed$imputations$imp1
# 
# SCIP_VTE_2_melted<-melt(data = df_SCIP_VTE_2, id.vars = c("Provider.Number"),measure.vars=scores, variable.name = "Year",
#                         value.name = "Score")
# Missingness_SCIP_VTE_2 <- SCIP_VTE_2_melted %>%
#   group_by(Provider.Number) %>%
#   summarise(All_8_Values = sum(!is.na(Score))) %>%
#   filter(All_8_Values>=7)
# SCIP_VTE_2_melted<-subset.data.frame(SCIP_VTE_2_melted,SCIP_VTE_2_melted$Provider.Number %in% Missingness_SCIP_VTE_2$Provider.Number)
# SCIP_VTE_2_melted$Year<-as.numeric(sub("Score_","",SCIP_VTE_2_melted$Year))
# SCIP_VTE_2_ameliaed <- amelia(SCIP_VTE_2_melted, m=1,idvars = "Provider.Number",ts = "Year", polytime= 1)
# SCIP_VTE_2_melted<-SCIP_VTE_2_ameliaed$imputations$imp1
# 
# 
# ###appending DFs one on top of one another
# 
# super_melt<-combine(AMI_1_melted,AMI_8_melted,HF_2_melted,SCIP_1_melted,SCIP_2_melted,SCIP_3_melted,SCIP_VTE_2_melted)
# super_melt$source<-gsub("_melted","",super_melt$source)
# super_melt<-dplyr::rename(super_melt, condition = source)
# super_melt<-dplyr::rename(super_melt, msr_end_yr = Year)
# super_melt<-dplyr::rename(super_melt, rate = Score)
# super_melt$Provider.Number<-ifelse(nchar(super_melt$Provider.Number)<6,paste("0",super_melt$Provider.Number, sep = ""),super_melt$Provider.Number)
# saveRDS(super_melt, file = "qualmeasures.rds")

# Set names of datasets by condition
dataset.list <- c("ami" = "hrrp_hc_did_ami_smpl.dta", 
                  "hf" = "hrrp_hc_did_hf_smpl.dta", 
                  "pn" = "hrrp_hc_did_pn_smpl.dta")

# reading the dta files
ami.dta = read_dta("hrrp_hc_did_ami_smpl.dta")
hf.dta = read_dta("hrrp_hc_did_hf_smpl.dta")
pn.dta = read_dta("hrrp_hc_did_pn_smpl.dta")

# creating a master dataset that contains AMI, HF, and PN
master.dta = rbind(ami.dta,
                   hf.dta,
                   pn.dta)

# Importing the RDS file
qualmeasures = readRDS('qualmeasures.rds')
pvn_numeric = as.numeric(qualmeasures$Provider.Number)

# Converting the pvn in qualmeasures to be numeric
qualmeasures$Provider.Number = as.numeric(qualmeasures$Provider.Number)

# omitting all the NA variables
na.omit(qualmeasures$Provider.Number)

# dropping cols that are about to have duplicated names or prove confusing (jdb) *****
drops <- c('rate','msr_strt_dt','msr_end_dt','condition','measure','postbin','post_treat','postbin_treat','post')
master.dta[ , !(names(master.dta) %in% drops)]


#filtering so we have only one row per year per hospital
master.dta<-master.dta %>% distinct(prvdr_num, msr_end_yr,.keep_all=TRUE)

# merging... on two keys "provider #" and "year"  :-)
colnames(master.dta)[1] = "Provider.Number" # setting the varnames equal

merged.df.final  = merge(master.dta,
                         qualmeasures,
                         by = c("Provider.Number", "msr_end_yr"))

#setting the post value to 12 (months) if msr_end_year>2010, else 0


merged.df.final <- merged.df.final %>%
  mutate(post_treat=ifelse(msr_end_yr>2010,12,0))

merged.df.final$condition.x<-NULL
merged.df.final$measure<-NULL
merged.df.final$rate.x<-NULL


merged.df.final<-dplyr::rename(merged.df.final, prvdr_num = Provider.Number)
merged.df.final<-dplyr::rename(merged.df.final, rate = rate.y)
merged.df.final<-dplyr::rename(merged.df.final, condition = condition.y)

##bookmark

#dropping rows with missing values (we already imputted everything if had missingness in just 7 of 8 years of measurement for given provider id
merged.df.final<-merged.df.final %>% filter(!(is.na(rate)))

# subsetting the df based on conditions
measure<-split(merged.df.final, merged.df.final$condition)

df_AMI_1<-measure$AMI_1
#df_AMI_7<-measure$AMI_7 too few
df_AMI_8<-measure$AMI_8
df_HF_2<-measure$HF_2
df_SCIP_1<-measure$SCIP_1
df_SCIP_2<-measure$SCIP_2
df_SCIP_3<-measure$SCIP_3
df_SCIP_VTE_2<-measure$SCIP_VTE_2


# writing and saving the dta files for each
tmp = tempfile(fileext = ".dta")

# writing the dta files
write_dta(df_AMI_1, "df_AMI_1.dta")
write_dta(df_AMI_8, "df_AMI_8.dta")
write_dta(df_HF_2, "df_HF_2.dta")
write_dta(df_SCIP_1, "df_SCIP_1.dta")
write_dta(df_SCIP_2, "df_SCIP_2.dta")
write_dta(df_SCIP_3, "df_SCIP_3.dta")
write_dta(df_SCIP_VTE_2, "df_SCIP_VTE_2.dta")

# defining a new dataset list
dataset.list <-c("ami_1"="df_AMI_1.dta",
                 "ami_8"="df_AMI_8.dta",
                 "hf_2"="df_HF_2.dta",
                 "scip_1"="df_SCIP_1.dta",
                 "scip_2"="df_SCIP_2.dta",
                 "scip_3"="df_SCIP_3.dta",
                 "scip_vte_2"="df_SCIP_VTE_2.dta")


#####################
# Replicate Table 2 #
###### PLACEBO ######
#####################

#Define scale variable for the impact of the HRRP on readmission rates -- multiply by 12 to convert months to years and by 100 to convert to percentage points
yr.percent <- 12*100

#Calculate results across all conditions for overall (i.e., all covariates at mean) and baseline (i.e., no participation in voluntary programs) contexts
results_table.2 <- data.frame(ami_1 = c(as.character(nrow(read_dta(dataset.list["ami_1"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["ami_1"]),yr.percent, "prvdr_num")),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["ami_1"]), yr.percent, "prvdr_num", covariate_levels[1,]))),
                              
                              "ami_8" = c(as.character(nrow(read_dta(dataset.list["ami_8"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["ami_8"]), yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["ami_8"]), yr.percent, "prvdr_num", covariate_levels[1,]))),
                              
                              "hf_2" = c(as.character(nrow(read_dta(dataset.list["hf_2"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["hf_2"]),yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["hf_2"]), yr.percent, "prvdr_num", covariate_levels[1,]))),
                              
                              "scip_1" = c(as.character(nrow(read_dta(dataset.list["scip_1"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_1"]),yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_1"]), yr.percent, "prvdr_num",covariate_levels[1,]))),
                              
                              "scip_2" = c(as.character(nrow(read_dta(dataset.list["scip_2"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_2"]),yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_2"]), yr.percent, "prvdr_num",covariate_levels[1,]))),
                              
                              "scip_3" = c(as.character(nrow(read_dta(dataset.list["scip_3"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_3"]),yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_3"]), yr.percent, "prvdr_num",covariate_levels[1,]))),
                              
                              "scip_vte_2" = c(as.character(nrow(read_dta(dataset.list["scip_vte_2"]))),table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_vte_2"]),    yr.percent, "prvdr_num")),  table.present(margins_plm(model, "post_treat", read_dta(dataset.list["scip_vte_2"]), yr.percent, "prvdr_num",    covariate_levels[1,]))),
                              
                              stringsAsFactors = FALSE)

#Calculate first difference effect on marginal effect of HRRP from switching from baseline (i.e., no participation in voluntary programs) to various combinations of participation in voluntary programs
for(i in 2:nrow(covariate_levels)){
  covariate_level <- covariate_levels[i,]
  results_table.2[i+2,] <- c(table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["ami_1"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["ami_8"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["hf_2"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["scip_1"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["scip_2"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["scip_3"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)),
                             table.present(margins_plm_delta(model, "post_treat", read_dta(dataset.list["scip_vte_2"]), yr.percent, "prvdr_num", covariate_levels[1,], covariate_level)))
}

#Package and export results to Latex table
rownames(results_table.2) <- c("Observations", "Overall HRRP Estimate", rownames(covariate_levels))
print(xtable(results_table.2, label = "table2_placebo"), file = "table_2_placebo.tex", floating = FALSE)

##############################
##### Replicate Table 1 ######
##############################

#Calculate Summary Statistics for Each Condition
Table_1 <- sapply(dataset.list, function(i){
  
  #Set condition dataset
  df<-read_dta(i)
  
  #Calculate summary statistics by hospital
  hospital_char<-df %>%
    group_by(prvdr_num) %>%
    summarise(n=n(), rate30d_mean=mean(rate), beds_mean=mean(beds),dshpct_mean=mean(dshpct),
              teach_percent=mean(teach)*100, urban_percent=sum(geo==1|geo==2)/sum(!(is.na(geo))),
              meaningful_use=max(mu2r), aco=max(aco2r), bundles=max(bpci2r))
  
  #Calculate summary statistics across all hospitals
  hospital_char_sum<-hospital_char %>%
    summarise(hospitals=0, observations=0, rate_30_day=mean(rate30d_mean)*100, rate_30_day_sd=0, beds=mean(beds_mean),
              beds_sd=sd(beds_mean), dsh=mean(dshpct_mean), dsh_sd=sd(dshpct_mean),
              teach=mean(teach_percent), urban=mean(urban_percent)*100, vol_initit=0,
              mu=mean(meaningful_use)*100, aco=mean(aco)*100, bundles=mean(bundles)*100
    )
  
  #Calculate standard deviation of readmit rate across hospitals
  hospital_char_sum$rate_30_day_sd<-sd(df$rate)*100
  
  #Count number of hospitals
  hospital_char_sum$hospitals<-sum(!(is.na(hospital_char$prvdr_num)))
  
  #Count number of observations (8 per hospital)
  hospital_char_sum$observations<-hospital_char_sum$hospitals*8
  
  #Return Results
  return(t(round(hospital_char_sum,2)))
})

#Format table for export
Table_1=as.data.frame(Table_1)
Table_1[11,]=""
Characteristic_names<-c("Hospitals, No", "Hospital-year observations, No", "30-day risk standardized observations rate, mean", "30-day risk standardized observations rate, sd", "Beds, mean", "Beds, sd", "Disproportionate Share Index, mean", "Disproportionate Share Index, sd", "Teaching hospital, %", "Hospital located in urban area, %", "Hospital participating during any time in study period, %", "Meaningful use of health information technology", "Accountable care organization programs", "Bundled Payment for Care Initiative")
Table_1<-add_column(Table_1, Characteristic_names, .before = "ami_1")
#Table_1 <-Table_1 %>% 
#  rename(AMI = ami, 'Heart Failure'=hf, Pneumonia=pn)

#Export table to Latex
print(xtable(Table_1, label = "table1_placebo"), file = "table_1_placebo.tex", floating = FALSE)
